summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
blob: 08c658cb811e24e0a14181fe547ba94801c6a0b6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"

namespace Shader::Optimization {

namespace {
void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
    /*
     * Workaround for an NVIDIA bug seen in Super Mario RPG
     *
     * We are looking for this pattern:
     *   %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
     *   %lhs_mul = IMul32 %lhs_bfe, %factor_b           // potentially optional?
     *   %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
     *   %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
     *   %result  = IAdd32 %lhs_shl, %rhs_bfe
     *
     * And replacing the IAdd32 with a BitwiseOr32
     *   %result  = BitwiseOr32 %lhs_shl, %rhs_bfe
     *
     */
    IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
    IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
    if (!lhs_shl || !rhs_bfe) {
        return;
    }
    if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
        lhs_shl->Arg(1) != IR::Value{16U}) {
        return;
    }
    if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
        rhs_bfe->Arg(2) != IR::Value{16U}) {
        return;
    }
    IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
    if (!lhs_mul) {
        return;
    }
    const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
    if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
        lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
        return;
    }
    IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
    if (!lhs_bfe) {
        return;
    }
    if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
        return;
    }
    if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
        return;
    }
    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
    inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
}

} // Anonymous namespace

void VendorWorkaroundPass(IR::Program& program) {
    for (IR::Block* const block : program.post_order_blocks) {
        for (IR::Inst& inst : block->Instructions()) {
            switch (inst.GetOpcode()) {
            case IR::Opcode::IAdd32:
                AddingByteSwapsWorkaround(*block, inst);
                break;
            default:
                break;
            }
        }
    }
}

} // namespace Shader::Optimization